This short Markdown file shows how to query the WsprDaemon Timescale Database using R and SQL.
library(RPostgres)
library(DBI)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(DT)
library(knitr)
library(fuzzyjoin)
library(tmap)
library(sf)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
data(World)
dt_options <- list(pageLength=7, scrollX = "400px")
The settings are available over here.
db_name <- "wsprnet"
db_host <- "logs2.wsprdaemon.org"
db_user <- "wdread"
db_password <- "JTWSPR2008"
db_con <- dbConnect(RPostgres::Postgres(),
dbname = db_name,
host = db_host,
user = db_user,
password = db_password)
Check if it worked:
dbListTables(db_con)
## [1] "spots"
Yes!
Let’s try an example based on SQL from the WsprDaemon Timescale Databases Notes (Version 2.0 November 2020):
res <- dbSendQuery(db_con,
"SELECT * FROM spots
ORDER BY wd_time DESC
LIMIT 20;")
dbFetch(res) %>%
datatable(options = dt_options)
Yes – works.
dbClearResult(res)
Let’s try one day’s worth of data – reception reports of M0INF. There’s a SQL quirk below. Variable names which aren’t all in lower case (such as CallSign) have to be referenced using double quotation marks; the quotes are escaped using the backslash. String literals like a callsign use single quotes.
one_day <- dbSendQuery(db_con,
"SELECT * FROM spots
WHERE \"CallSign\" = 'M0INF'
AND wd_time >= '2021-01-03T00:00:00Z'
AND wd_time < '2021-01-04T00:00:00Z';")
This fetches the data to a data frame:
the_dat <- dbFetch(one_day)
dbClearResult(one_day)
the_dat %>%
datatable(options = dt_options)
Now plot:
the_dat %>%
ggplot(aes(wd_time, distance)) +
geom_point() +
geom_smooth(method = "gam",
formula = y ~ s(x, k = 20)) +
labs(x = "Time", y = "Distance (km)",
title = "WSPR reports of M0INF",
subtitle = "3 Jan 2021, 20m band") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H")
Let’s do the same again; however, now with reception reports both by and of M0INF.
one_day_send_receive <- dbSendQuery(db_con,
"SELECT * FROM spots
WHERE (\"CallSign\" = 'M0INF' OR \"Reporter\" = 'M0INF')
AND wd_time >= '2021-01-03T00:00:00Z'
AND wd_time < '2021-01-04T00:00:00Z';")
the_dat_send_rec <- dbFetch(one_day_send_receive)
dbClearResult(one_day_send_receive)
the_dat_send_rec <- the_dat_send_rec %>%
mutate(Direction = ifelse(Reporter == "M0INF",
"Rx by M0INF",
"Tx by M0INF"))
The graph is a bit crowded so the y-axis is on the \(\log_{10}\) scale.
the_dat_send_rec %>%
ggplot(aes(wd_time, distance, colour = Direction)) +
geom_point(alpha = 0.5, size = 1) +
stat_smooth(geom="line",
alpha = 0.5, size = 0.8,
method = "gam",
formula = y ~ s(x, k = 25)) +
labs(x = "Time", y = "Distance (km)",
title = "WSPR reports",
subtitle = "3 Jan 2021, 20m band") +
scale_y_continuous(trans = "log10") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
theme(legend.position="bottom")
Which of those reports are over 5000 km?
the_dat_send_rec %>%
filter(distance > 5000) %>%
group_by(CallSign, Reporter, distance) %>%
summarise(Spots = n()) %>%
arrange(desc(Spots)) %>%
kable()
## `summarise()` regrouping output by 'CallSign', 'Reporter' (override with `.groups` argument)
| CallSign | Reporter | distance | Spots |
|---|---|---|---|
| VK3MO | M0INF | 16880 | 7 |
| M0INF | KD2OM | 5635 | 2 |
| M0INF | PT2FHC | 8786 | 1 |
And which are under 100 km?
the_dat_send_rec %>%
filter(distance < 100) %>%
group_by(CallSign, Reporter, distance) %>%
summarise(Spots = n()) %>%
arrange(desc(Spots)) %>%
kable()
## `summarise()` regrouping output by 'CallSign', 'Reporter' (override with `.groups` argument)
| CallSign | Reporter | distance | Spots |
|---|---|---|---|
| M0INF | GB0SNB/SDR | 32 | 58 |
two_day_send_receive <- dbSendQuery(db_con,
"SELECT * FROM spots
WHERE (\"CallSign\" = 'M0INF' OR \"Reporter\" = 'M0INF')
AND wd_time >= '2021-01-03T00:00:00Z'
AND wd_time < '2021-01-05T00:00:00Z';")
two_dat_send_rec <- dbFetch(two_day_send_receive)
dbClearResult(two_day_send_receive)
two_dat_send_rec <- two_dat_send_rec %>%
mutate(Direction = ifelse(Reporter == "M0INF",
"Rx by M0INF",
"Tx by M0INF"))
two_dat_send_rec %>%
ggplot(aes(wd_time, distance, colour = Direction)) +
geom_point(alpha = 0.5, size = 1) +
stat_smooth(geom="line",
alpha = 0.5, size = 0.8,
method = "gam",
formula = y ~ s(x, k = 25)) +
labs(x = "Time", y = "Distance (km)",
title = "WSPR reports",
subtitle = "3-4 Jan 2021, 20m band") +
scale_y_continuous(trans = "log10") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
theme(legend.position="bottom")
The red curve doesn’t make sense since there is missing data which should essentially be zero. For now, let’s remove it…
two_dat_send_rec %>%
ggplot(aes(wd_time, distance, colour = Direction)) +
geom_point(alpha = 0.5, size = 1) +
labs(x = "Time", y = "Distance (km)",
title = "WSPR reports",
subtitle = "3-4 Jan 2021, 20m band") +
scale_y_continuous(trans = "log10") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
theme(legend.position="bottom")
day_40m_send_receive <- dbSendQuery(db_con,
"SELECT * FROM spots
WHERE (\"CallSign\" = 'M0INF' OR \"Reporter\" = 'M0INF')
AND wd_time >= '2021-01-05T00:00:00Z'
AND wd_time < '2021-01-06T00:00:00Z';")
day_40m_send_receive_dat <- dbFetch(day_40m_send_receive)
dbClearResult(day_40m_send_receive)
day_40m_send_receive_dat <- day_40m_send_receive_dat %>%
mutate(Direction = ifelse(Reporter == "M0INF",
"Rx by M0INF",
"Tx by M0INF"))
day_40m_send_receive_dat %>%
ggplot(aes(wd_time, distance, colour = Direction)) +
geom_point(alpha = 0.5, size = 1) +
labs(x = "Time", y = "Distance (km)",
title = "WSPR reports",
subtitle = "5 Jan 2021, 40m band") +
scale_y_continuous(trans = "log10") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H") +
theme(legend.position="bottom")
First, look at data where there’s more than one report.
dat_sent_many <- the_dat %>%
group_by(Reporter) %>%
summarise(spots = n()) %>%
arrange(desc(spots)) %>%
slice_head(n = 7)
## `summarise()` ungrouping output (override with `.groups` argument)
dat_sent_many %>%
kable()
| Reporter | spots |
|---|---|
| GB0SNB/SDR | 58 |
| IW2NKE | 22 |
| YU/G8ZMF | 10 |
| SA2KHG | 8 |
| OE1WYC | 7 |
| IZ7VHF/RX | 6 |
| UA3245SWL | 6 |
the_dat %>%
mutate(ReportDist = paste0(Reporter, " (", distance, "km)")) %>%
filter(Reporter %in% unique(dat_sent_many$Reporter)) %>%
ggplot(aes(wd_time, dB, colour = ReportDist)) +
geom_smooth(span = 1, se = FALSE) +
labs(x = "Time", y = "Signal report (dB)",
title = "WSPR reports (20m) – signal strength",
colour = "Reporter") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Try again for latest 40m data:
one_day_40m <- dbSendQuery(db_con,
"SELECT * FROM spots
WHERE \"CallSign\" = 'M0INF'
AND wd_time >= '2021-01-05T00:00:00Z'
AND wd_time < '2021-01-06T00:00:00Z';")
dat_40m <- dbFetch(one_day_40m)
dbClearResult(one_day_40m)
dat_sent_many_40 <- dat_40m %>%
group_by(Reporter) %>%
summarise(spots = n()) %>%
arrange(desc(spots)) %>%
#filter(spots >= 5)
slice_head(n = 7)
## `summarise()` ungrouping output (override with `.groups` argument)
dat_sent_many_40 %>%
kable()
| Reporter | spots |
|---|---|
| OE9HLH | 36 |
| OE9GHV | 33 |
| HB9TMC | 32 |
| F4GFZ | 22 |
| DK8FT/A | 21 |
| IW2NKE | 21 |
| DK2DB | 20 |
dat_40m %>%
mutate(ReportDist = paste0(Reporter, " (", distance, "km)")) %>%
filter(Reporter %in% unique(dat_sent_many_40$Reporter)) %>%
ggplot(aes(wd_time, dB, colour = ReportDist)) +
geom_smooth(span = 2, se = FALSE, size = 0.8) +
geom_point(size = 0.6, alpha = 0.5) +
labs(x = "Time", y = "dB",
title = "WSPR reports (40m) – signal strength",
subtitle = "A selection of reporters on 5 Jan 2021",
colour = "Reporter") +
scale_x_datetime(date_breaks = "2 hour", date_labels = "%H")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Here are the WSPR frequencies, for labeling the plot in a moment.
wspr_bands <- tibble(
MHz = c(
0.136,
0.4742,
1.8366,
3.5686,
5.2872,
5364.7,
7.0386,
10.1387,
14.0956,
18.1046,
21.0946,
24.9246,
28.1246,
50.293,
70.091,
144.489,
432.300,
1296.500
),
m = 299.792458 / MHz,
wavelength_text = ifelse(m >= 1,
paste(round(m, 0), "m"),
paste(round(m*100, 0), "cm"))
) %>%
arrange(MHz)
wspr_bands %>% kable()
| MHz | m | wavelength_text |
|---|---|---|
| 0.1360 | 2204.3563088 | 2204 m |
| 0.4742 | 632.2067862 | 632 m |
| 1.8366 | 163.2323086 | 163 m |
| 3.5686 | 84.0084229 | 84 m |
| 5.2872 | 56.7015543 | 57 m |
| 7.0386 | 42.5926261 | 43 m |
| 10.1387 | 29.5691221 | 30 m |
| 14.0956 | 21.2685134 | 21 m |
| 18.1046 | 16.5589109 | 17 m |
| 21.0946 | 14.2118105 | 14 m |
| 24.9246 | 12.0279747 | 12 m |
| 28.1246 | 10.6594390 | 11 m |
| 50.2930 | 5.9609182 | 6 m |
| 70.0910 | 4.2771891 | 4 m |
| 144.4890 | 2.0748462 | 2 m |
| 432.3000 | 0.6934824 | 69 cm |
| 1296.5000 | 0.2312321 | 23 cm |
| 5364.7000 | 0.0558824 | 6 cm |
Grab the data:
last_whispers <- dbSendQuery(db_con,
"SELECT * FROM spots
WHERE (\"ReporterGrid\" LIKE 'IO91%'
OR \"Grid\" LIKE 'IO91%')
AND wd_time > NOW() - INTERVAL '1 hour';")
the_dat <- dbFetch(last_whispers)
dbClearResult(last_whispers)
nrow(the_dat)
## [1] 3340
Group into WSPR bands:
joined_dat <- the_dat %>%
difference_left_join(wspr_bands,
by = "MHz",
max_dist = 0.2,
distance_col = "dist_band_start") %>%
mutate(MHz.y = as.factor(MHz.y))
Check how many kHz off the match is (also look for NAs):
summary(joined_dat$dist_band_start*1000)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.364 1.475 1.508 2.355 1.550 79.091
Plot:
joined_dat %>%
ggplot(aes(distance, MHz.y, colour = MHz.y)) +
geom_jitter(height = 0.05, alpha = 1/3, size = 1) +
xlab("Distance (km)") +
ylab("Freq (MHz)") +
labs(title = "WSPR spots IO91 (sent or received)",
subtitle = paste0("From ", min(the_dat$wd_time),
" to ", max(the_dat$wd_time))) +
theme(legend.position = "none")
Try again, but this time separating the transmissions and receptions in IO91.
joined_dat <- joined_dat %>%
mutate(direction = ifelse(grepl(
pattern = "IO91",
x = Grid,
ignore.case = TRUE
), "Sent from IO91", "Received in IO91"))
joined_dat %>%
ggplot(aes(distance, MHz.y, colour = direction)) +
geom_point(position = position_jitterdodge(jitter.width = .1,
jitter.height = 0,
dodge.width = 0.4),
alpha = 1/3, size = 1) +
xlab("Distance (km)") +
ylab("Freq (MHz)") +
labs(title = "WSPR spots IO91 (sent or received)",
subtitle = paste0("From ", min(the_dat$wd_time),
" to ", max(the_dat$wd_time)),
colour = "Direction")
Let’s do the same again but also showing the direction, for transmissions from IO91 only.
tx_only <- joined_dat %>%
filter(grepl(pattern = "IO91", x = Grid, ignore.case = TRUE))
linear_scale_polar <- tx_only %>%
ggplot(aes(azimuth, distance, colour = MHz.y)) +
geom_point(alpha = 0.5) +
labs(
title = "WSPR spots IO91",
subtitle = paste0("From ", min(the_dat$wd_time),
" to ", max(the_dat$wd_time)),
colour = "Freq (MHz)",
x = NULL,
y = "Distance"
) +
scale_x_continuous(
limits = c(0, 360),
breaks = seq(0, 315, by = 45),
minor_breaks = seq(0, 360, by = 15)
) +
coord_polar()
linear_scale_polar
linear_scale_polar +
scale_y_continuous(trans = "log10")
## Warning: Transformation introduced infinite values in continuous y-axis
tx_only %>%
filter(distance > 8000) %>%
select(CallSign, Reporter, distance, azimuth, MHz.x) %>%
kable()
| CallSign | Reporter | distance | azimuth | MHz.x |
|---|---|---|---|---|
| G4LRP | PT2FHC | 8731 | 226 | 14.09706 |
IO91_48hrs <- dbSendQuery(db_con,
"SELECT * FROM spots
WHERE (\"ReporterGrid\" LIKE 'IO91%'
OR \"Grid\" LIKE 'IO91%')
AND wd_time >= '2021-01-05T00:00:00Z'
AND wd_time < '2021-01-07T00:00:00Z';")
IO91_48hrs_dat <- dbFetch(IO91_48hrs)
dbClearResult(IO91_48hrs)
nrow(IO91_48hrs_dat)
## [1] 189067
IO91_48hrs_dat_bands <- IO91_48hrs_dat %>%
difference_left_join(wspr_bands,
by = "MHz",
max_dist = 0.1,
distance_col = "dist_band_start") %>%
mutate(
MHz.y = as.factor(MHz.y),
direction = ifelse(
grepl(
pattern = "IO91",
x = Grid,
ignore.case = TRUE
),
"From IO91",
"To IO91"
)
)
IO91_48hrs_dat_bands %>%
filter(dist_band_start > 5/1000) %>%
select(CallSign, Reporter, MHz.x, MHz.y, dist_band_start) %>%
arrange(desc(dist_band_start)) %>%
datatable(options = dt_options)
IO91_48hrs_dat_bands %>%
filter(is.na(MHz.y)) %>%
select(CallSign, Reporter, MHz.x, MHz.y, dist_band_start) %>%
datatable(options = dt_options)
summary(IO91_48hrs_dat_bands$dist_band_start*1000)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.362 1.456 1.505 2.619 1.555 79.092 2
IO91_48hrs_dat_bands %>%
filter(!is.na(MHz.y)) %>%
ggplot(aes(x = distance, fill = MHz.y)) +
geom_histogram(aes(y=..density..), binwidth = 500, position = "identity", alpha = 0.5) +
facet_grid(rows = vars(direction)) +
labs(x = "Distance (km)", y = "Mass", fill = "Freq (MHz)")
date_format <- "%d %b %Y (%H:%m)"
IO91_48hrs_dat_bands %>%
filter(!is.na(MHz.y)) %>%
ggplot(aes(x = distance, fill = MHz.y)) +
geom_histogram(
#aes(y = ..density..),
binwidth = 200,
position = "identity",
) +
facet_grid(cols = vars(direction), rows = vars(MHz.y)) +
labs(
x = "Distance (km)",
y = "Mass",
fill = "Freq (MHz)",
title = "WSPR reports to/from IO91 by band",
subtitle = paste0(format(
min(IO91_48hrs_dat_bands$wd_time), date_format
), " to ",
format(
max(IO91_48hrs_dat_bands$wd_time), date_format
))
) +
theme(legend.position = "none")
IO91_48hrs_dat_bands %>%
filter(!is.na(MHz.y) & direction == "From IO91") %>%
ggplot(aes(azimuth, y = distance)) +
geom_point() +
facet_grid(rows = vars(MHz.y)) +
labs(
x = "Distance (km)",
y = "Mass",
fill = "Freq (MHz)",
title = "WSPR reports from IO91 by band",
subtitle = paste0(format(
min(IO91_48hrs_dat_bands$wd_time), date_format
), " to ",
format(
max(IO91_48hrs_dat_bands$wd_time), date_format
))
) +
scale_x_continuous(
limits = c(0, 360),
breaks = seq(0, 315, by = 45),
minor_breaks = seq(0, 360, by = 15)
) +
coord_polar()
IO91_48hrs_dat_bands_sf <- IO91_48hrs_dat_bands %>%
st_as_sf(coords = c("wd_rx_lon", "wd_rx_lat"),
crs = 4326)
This step is rather slow…
t1 <- Sys.time()
IO91_48hrs_dat_bands_sf %>%
filter(!is.na(MHz.y)) %>%
ggplot() +
geom_sf(data = World) +
geom_sf(aes(colour = MHz.y)) +
facet_grid(rows = vars(MHz.y)) +
theme_classic() +
labs(
colour = "Freq (MHz)",
title = "WSPR reports from IO91 by band",
subtitle = paste0(format(
min(IO91_48hrs_dat_bands$wd_time), date_format
), " to ",
format(
max(IO91_48hrs_dat_bands$wd_time), date_format
))
) +
theme(legend.position = "none")
t2 <- Sys.time()
t2 - t1
## Time difference of 1.945946 mins